library(vegan)
library(broom)
library(MASS)
library(tidyverse)
## Do Not Run
#wExp <- readRDS("~/RMB/SoilDomestication/Data/wExp.rds")
#wExp %>% 
#  filter(Site == "Arkansas") %>% 
#  ungroup() %>% 
#  saveRDS("~/RMB/SoilDomestication/Data/native_plants_data.rds")
nExp <- readRDS("~/RMB/SoilDomestication/Data/native_plants_data.rds") %>% filter(SampleID != "Arw.22")
tax <- readRDS("~/RMB/SoilDomestication/Data/gg_otus_tax.rds")
long_pcoa <- function(x, samples = "SampleID", otus = "variable", value = "RA", dist = "bray"){
  to_drop <- otus
  metadata <- x %>% 
    dplyr::ungroup() %>% 
    purrr::discard(is.double) %>% 
    dplyr::select_(.dots = paste("-", to_drop)) %>% 
    dplyr::distinct()
  
  wide_table <- x %>% 
    dplyr::select_(samples, otus, value) %>% 
    tidyr::spread_(otus, value, fill = 0)
  
  pc <- vegan::capscale(log2(wide_table[,2:ncol(wide_table)] + 1) ~ 1, dist = dist)
  axes <- dplyr::bind_cols(metadata, dplyr::as_tibble(vegan::scores(pc, choices = c(1:5))$sites))
  eigen_vals <- vegan::eigenvals(pc) / sum(vegan::eigenvals(pc))
  
  return(list(axes = axes, eigen_vals = eigen_vals))
}
raref <- function(x, sampling_depth, value = "value", otus = "variable") {
  rare_values <- data.frame(table(sample(x$`variable`, sampling_depth, replace = T, prob = x$`value`/x$depth)))
  names(rare_values) <- c(otus, "rare_value")
  return(suppressMessages(left_join(x, rare_values, by = otus) %>% replace_na(list(rare_value = 0))))
}
long_adonis <- function(x, samples = "SampleID", otus = "variable", value = "RA", dist = "bray", formula) {
    to_drop <- otus
    metadata <- x %>% 
        dplyr::ungroup() %>% 
        purrr::discard(is.double) %>% 
        dplyr::select_(.dots = paste("-", to_drop)) %>% 
        dplyr::distinct()
  
    wide_table <- x %>% 
        dplyr::select_(samples, otus, value) %>% 
        tidyr::spread_(otus, value, fill = 0)
    permanova <- vegan::adonis(as.formula(paste("wide_table[,2:ncol(wide_table)] ~ ", formula, sep = "")), data = metadata, dist = dist)
    return(permanova)
}

Phyla Stuff

Differential Abundance

safe_glm <- possibly(glm, NA_real_)
safe_glm.nb <- possibly(glm.nb, NA_real_)
nExp2 <- nExp %>% 
  ungroup() %>% 
  mutate(variable = factor(variable)) %>% 
  group_by(SampleID) %>% 
  nest() %>% 
  mutate(rare = map(data, ~raref(., sampling_depth = 15000, value = "value"))) %>% 
  unnest(rare)
pois_glm_plant <- nExp2 %>% 
  filter(Compartment != "Bulk Soil") %>% 
  group_by(variable) %>% 
  filter(sum(rare_value > 0) / n() > 0.1) %>% 
  mutate(host_common_name = relevel(factor(host_common_name), ref = "Rice")) %>% 
  group_by(variable, Compartment) %>% 
  nest() %>%
  mutate(models = map(data, ~suppressWarnings(safe_glm.pois(rare_value ~ host_common_name + offset(log2(depth)), family = poisson, .))))
nb_glm_plant <- nExp2 %>% 
  filter(Compartment != "Bulk Soil") %>% 
  group_by(variable) %>% 
  filter(sum(rare_value > 0) / n() > 0.1) %>% 
  mutate(host_common_name = relevel(factor(host_common_name), ref = "Rice")) %>% 
  group_by(variable, Compartment) %>% 
  nest() %>%
  mutate(models = map(data, ~suppressWarnings(safe_glm.nb(rare_value ~ host_common_name + offset(log2(depth)), .))))
lm_plant <- nExp2 %>% 
  filter(Compartment != "Bulk Soil") %>% 
  group_by(variable) %>% 
  filter(sum(rare_value > 0) / n() > 0.1) %>% 
  mutate(host_common_name = relevel(factor(host_common_name), ref = "Rice")) %>% 
  group_by(variable, Compartment) %>% 
  nest() %>%
  mutate(models = map(data, ~lm(log2((RA * 1000) + 1) ~ host_common_name, .)))

---
title: "Native Plants Analysis"
output: html_notebook
---

```{r}
library(vegan)
library(broom)
library(MASS)
library(tidyverse)
library(broom)
library(tidyMB)
```

```{r}
## Do Not Run
#wExp <- readRDS("~/RMB/SoilDomestication/Data/wExp.rds")
#wExp %>% 
#  filter(Site == "Arkansas") %>% 
#  ungroup() %>% 
#  saveRDS("~/RMB/SoilDomestication/Data/native_plants_data.rds")

nExp <- readRDS("~/RMB/SoilDomestication/Data/native_plants_data.rds") %>% filter(SampleID != "Arw.22") %>% 
  mutate(cpm = round(value * (1000000 / depth)))
tax <- readRDS("~/RMB/SoilDomestication/Data/gg_otus_tax.rds")
```

```{r}
raref <- function(x, sampling_depth, value = "value", otus = "variable") {
  rare_values <- data.frame(table(sample(x$`variable`, sampling_depth, replace = T, prob = x$`value`/x$depth)))
  names(rare_values) <- c(otus, "rare_value")
  return(suppressMessages(left_join(x, rare_values, by = otus) %>% replace_na(list(rare_value = 0))))
}
```


```{r} 
nPC <- tidy_pcoa(nExp %>% group_by(variable) %>% filter(sum(value) > 0) %>% mutate(RA = RA*1000), dist = "bray", value = "RA")
nPC$axes %>% 
  ggplot(aes(MDS1, MDS2, color = host_common_name)) +
  geom_point(size = 3, alpha = 0.7) +
  stat_ellipse(aes(group = Compartment), color = "black") +
  scale_color_brewer(palette = "Set1", direction = -1) +
  theme_minimal() +
  labs(x = paste("PCo1 (", round(nPC$eigen_vals[1] * 100, 2), "%)", sep = ""), y = paste("PCo2 (", round(nPC$eigen_vals[2] * 100, 2), "%)", sep = ""))
```
```{r}
long_adonis(nExp %>% mutate(RA2 = log2((RA*1000) + 1)), value = "RA2", formula = "Compartment * host_common_name")
nExp %>% 
  mutate(RA2 = log2((RA*1000) + 1)) %>% 
  group_by(Compartment) %>% 
  nest() %>% 
  filter(Compartment != "Bulk Soil") %>% 
  mutate(ad = map(data, ~long_adonis(., value = "RA2", formula = "host_common_name")))
```

## Phyla Stuff
```{r}
phyla_abund <- nExp %>% 
  inner_join(tax, by = "variable") %>% 
  group_by(SampleID, Compartment, host_common_name, Phylum2, depth) %>% 
  summarise(phy_total = sum(value)) %>% 
  mutate(prop = (phy_total + 1) / (depth + 1)) %>% 
  group_by(Compartment) %>% 
  mutate(alpha = fitdistr(prop, dbeta, start = list(shape1 = 1, shape2 = 10))$estimate[1],
         beta = fitdistr(prop, dbeta, start = list(shape1 = 1, shape2 = 10))$estimate[2]) %>% 
  mutate(emp_estimate = (phy_total + alpha + 1) / (depth + 1 + alpha + beta))

phy_host_lm <- phyla_abund %>% 
  group_by(Compartment, Phylum2) %>% 
  filter(host_common_name != "Soil") %>% 
  mutate(host_common_name = fct_relevel(host_common_name, "Rice", "Redstem", "Sedge", "Mudplantain")) %>% 
  nest() %>% 
  mutate(models = map(data, ~tidy(lm(log2(emp_estimate) ~ host_common_name, .)))) %>% 
  unnest(models) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(term = gsub("host_common_name", "", term)) %>% 
  group_by(Compartment) %>% 
  mutate(p.adj = p.adjust(p.value, "fdr"))

phy_host_lm1 <- phyla_abund %>% 
  group_by(Compartment, Phylum2) %>% 
  filter(host_common_name != "Soil") %>% 
  mutate(host_common_name = fct_relevel(host_common_name, "Rice", "Redstem", "Sedge", "Mudplantain")) %>% 
  nest() %>% 
  mutate(models = map(data, ~tidy(lm(log2(phy_total + 1) ~ host_common_name, .)))) %>% 
  unnest(models) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(term = gsub("host_common_name", "", term)) %>% 
  group_by(Compartment) %>% 
  mutate(p.adj = p.adjust(p.value, "fdr"))

phy_comp_lm <- phyla_abund %>% 
  group_by(Phylum2) %>% 
  nest() %>% 
  mutate(models = map(data, ~tidy(lm(log2(emp_estimate) ~ Compartment, .)))) %>% 
  unnest(models) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(term = gsub("Compartment", "", term)) %>% 
  mutate(p.adj = p.adjust(p.value, "bon"))

phy_comp_lm %>% 
  filter(p.adj <= 0.05) %>% 
  mutate(Phylum2 = fct_reorder(factor(Phylum2), estimate)) %>% 
  ggplot(aes(Phylum2, estimate)) +
  geom_point() +
  facet_grid(. ~ term)

phy_comp_lm %>% 
  inner_join(phyla_abund %>% group_by(Phylum2, Compartment) %>% summarise(mean_ab = mean(emp_estimate)), by = c(c("term" = "Compartment"), "Phylum2")) %>% 
  mutate(Compartment = fct_relevel(term, "Rhizosphere", "Endosphere")) %>%
  ggplot(aes(Compartment, estimate, group = Phylum2, color = ifelse(p.adj <= 0.05, Compartment, "ns"), shape = ifelse(p.adj <= 0.05, "sig", "ns"), size = log2(mean_ab * 1000))) +
  geom_line(color = 'black', alpha = 0.5, size = 0.5) +
  geom_point(alpha = 0.5) +
  scale_shape_manual(values = c(1, 16)) +
  scale_color_manual(values = c("darkmagenta", "steelblue", "black")) +
  scale_size_continuous(range = c(0,10)) +
  theme_minimal()

phy_host_lm %>% 
  filter(p.adj <= 0.05) %>% ungroup() %>% 
  mutate(Compartment = fct_relevel(Compartment, "Rhizosphere", "Endosphere")) %>% 
  ggplot(aes(Phylum2, term, fill = estimate)) +
  geom_tile() +
  facet_grid(Compartment~.) +
  scale_fill_gradient2(low = "gold", high = "dodgerblue", mid = 'black') +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))
  
phyla_abund %>% 
  group_by(Phylum2) %>% 
  nest() %>% 
  mutate(total = map_dbl(data, ~sum(.x$prop))) %>% 
  top_n(15, total) %>% 
  unnest(data) %>% 
  mutate(Compartment = fct_relevel(Compartment, "Bulk Soil", "Rhizosphere", "Endosphere")) %>% 
  group_by(Compartment, host_common_name, SampleID) %>% 
  nest() %>% 
  group_by(Compartment) %>% 
  arrange(host_common_name) %>% 
  mutate(order = 1:n()) %>% 
  unnest() %>% 
  ggplot(aes(order, prop * 100, fill = Phylum2)) +
  geom_bar(stat = "identity") +
  geom_point(aes(x = order, y = -2, color = host_common_name)) +
  facet_grid(.~Compartment, scales = "free_x") +
  scale_fill_manual(values = c(brewer.pal(11, "RdGy"),brewer.pal(5, "Blues")[-1])) +
  scale_color_brewer(palette = "Set1", direction = -1) +
  labs(x = "", y = "Percent of Reads") +
  theme_minimal() +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), legend.position = "bottom")
```

## Differential Abundance
```{r}
safe_glm <- possibly(glm, NA_real_)
safe_glm.nb <- possibly(glm.nb, NA_real_)

nExp2 <- nExp %>% 
  ungroup() %>% 
  mutate(variable = factor(variable)) %>% 
  group_by(SampleID) %>% 
  nest() %>% 
  mutate(rare = map(data, ~raref(., sampling_depth = 15000, value = "value"))) %>% 
  unnest(rare)

pois_glm_plant <- nExp2 %>% 
  filter(Compartment != "Bulk Soil") %>% 
  group_by(variable) %>% 
  filter(sum(rare_value > 0) / n() > 0.1) %>% 
  mutate(host_common_name = relevel(factor(host_common_name), ref = "Rice")) %>% 
  group_by(variable, Compartment) %>% 
  nest() %>%
  mutate(models = map(data, ~suppressWarnings(safe_glm.pois(rare_value ~ host_common_name + offset(log2(depth)), family = poisson, .))))

nb_glm_plant <- nExp2 %>% 
  filter(Compartment != "Bulk Soil") %>% 
  group_by(variable) %>% 
  filter(sum(rare_value > 0) / n() > 0.1) %>% 
  mutate(host_common_name = relevel(factor(host_common_name), ref = "Rice")) %>% 
  group_by(variable, Compartment) %>% 
  nest() %>%
  mutate(models = map(data, ~suppressWarnings(safe_glm.nb(value ~ host_common_name + offset(log2(depth)), .))))

lm_plant <- nExp2 %>% 
  filter(Compartment != "Bulk Soil") %>% 
  group_by(variable) %>% 
  filter(sum(rare_value > 0) / n() > 0.1) %>% 
  mutate(host_common_name = relevel(factor(host_common_name), ref = "Rice")) %>% 
  group_by(variable, Compartment) %>% 
  nest() %>%
  mutate(models = map(data, ~lm(log2((RA * 1000) + 1) ~ host_common_name, .)))
```

```{r}
pois_glm %>% 
  unnest(map(models, ~tidy(.))) %>% 
  mutate(p.adj = p.adjust(p.value, "fdr")) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(direction = ifelse(estimate > 0, "Weed", "Rice")) %>% 
  filter(p.adj <= 0.05) %>% 
  group_by(Compartment, direction, variable) %>% 
  summarise(n = n()) %>% 
  group_by(Compartment, direction) %>% 
  mutate(total = n()) %>% 
  filter(n == 3) %>% 
  group_by(Compartment, direction, total) %>% 
  summarise(n = n())

nb_glm %>% 
  unnest(map(models, ~tidy(.))) %>% 
  mutate(p.adj = p.adjust(p.value, "fdr")) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(direction = ifelse(estimate > 0, "Weed", "Rice")) %>% 
  filter(p.adj <= 0.05) %>% 
  group_by(Compartment, direction, variable) %>% 
  summarise(n = n()) %>% 
  group_by(Compartment, direction) %>% 
  mutate(total = n()) %>% 
  filter(n == 3) %>% 
  group_by(Compartment, direction, total) %>% 
  summarise(n = n())

lm_plant %>% 
  unnest(map(models, ~tidy(.))) %>% 
  mutate(p.adj = p.adjust(p.value, "fdr")) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(direction = ifelse(estimate > 0, "Weed", "Rice")) %>% 
  filter(p.adj <= 0.05) %>% 
  group_by(Compartment, direction, variable) %>% 
  summarise(n = n()) %>% 
  group_by(Compartment, direction) %>% 
  mutate(total = n()) %>% 
  filter(n == 3) %>% 
  group_by(Compartment, direction, total) %>% 
  summarise(n = n())

library(edgeR)
wide
```

```{r}
significant_otus <- bind_rows(lm_plant %>% mutate(model = "lm"),
          nb_glm_plant %>% mutate(model = "glm.nb")) %>% #,
         # pois_glm_plant %>% mutate(model = "glm.pois")) %>% 
  unnest(map(models, ~tidy(.))) %>% 
  filter(term != "(Intercept)") %>% 
  group_by(Compartment, model) %>% 
  mutate(p.adj = p.adjust(p.value, "fdr")) %>% 
  filter(p.adj <= 0.05) %>% 
  mutate(direction = ifelse(estimate > 0, "Weed", "Rice")) 

significant_otus %>% 
  group_by(Compartment, direction, variable, term) %>% 
  mutate(n = n()) %>% 
  filter(n == 1) %>% 
  ggplot(aes(model, fill = model)) +
  geom_bar() +
  facet_grid(Compartment ~ term)

significant_otus %>% 
  group_by(Compartment, direction, variable, term) %>% 
  mutate(n = n()) %>% 
  ggplot(aes(n, fill = direction)) +
  geom_bar(position = "dodge") +
  facet_grid(Compartment ~ term)

table(paste(significant_otus$model, significant_otus$direction))

significant_otus %>% group_by(Compartment, direction, variable, term) %>% 
  mutate(n = n()) %>% 
  filter(n == 1) %>% 
  filter(model != "lm")
```


```{r}
nExp2 %>% 
  filter(variable == "1108726" & Compartment == "Rhizosphere") %>% 
  ggplot(aes(host_common_name, RA, color = host_common_name)) +
  geom_jitter(height = 0, width = 0.1)

lm_plant %>% filter(variable == "1110303") %>% 
  unnest(map(models, ~tidy(.)))
tax["1091625",]
```

